suppressPackageStartupMessages({
  library(epiwraps)
  library(rtracklayer)
  library(GenomicRanges)
  library(ggplot2)
  library(ComplexHeatmap)
  library(chromVAR)
  library(motifmatchr)
  library(memes)
  library(MotifDb)
  library(universalmotif)
  library(TFBSTools)
  library(AnnotationHub)
  library(RColorBrewer)
  library(viridis)
  library(viridisLite)
  library(cowplot)
})
## Possible Ensembl SSL connectivity problems detected.
## Please see the 'Connection Troubleshooting' section of the biomaRt vignette
## vignette('accessing_ensembl', package = 'biomaRt')Error in curl::curl_fetch_memory(url, handle = handle) : 
##   Peer certificate cannot be authenticated with given CA certificates: [uswest.ensembl.org] SSL certificate problem: certificate has expired
## See system.file("LICENSE", package="MotifDb") for use restrictions.
theme_set(theme_bw())
set.seed(123)

load ATAC data for TDG_KO

bamfiles_ctrl <- list.files("../Tdg_wt/aligned/",pattern = "^Kg_tdg_M(.*)\\.bam$", full =TRUE)
bamfiles_TDG <- list.files("../Tdg_wt/aligned/",pattern = "^Kg_tdg_ko(.*)\\.bam$", full =TRUE)

names(bamfiles_ctrl) <- c("wt_1", "wt_2")
names(bamfiles_TDG) <- c("tdg +/- 1", "tdg +/- 2","tdg +/- 3")
Ctrl_Full_bigwig <- list.files("../Tdg_wt//tracks/",pattern = "^Kg_tdg_M(.*).bw$", full =TRUE)

Tdg_Full_bigwig <- list.files("../Tdg_wt//tracks/",pattern = "^Kg_tdg_ko(.*).bw$", full =TRUE)

give names to the tracks

names(Ctrl_Full_bigwig)<- c("wt_1", "wt_2")

names(Tdg_Full_bigwig) <- c(c("tdg +/- 1", "tdg +/- 2","tdg +/- 3"))

total_bw_list <- c(Ctrl_Full_bigwig,Tdg_Full_bigwig)

load peaks

peaks

peaks_TDG_BP <- c(TDG_KO1_BP = rtracklayer::import("../Tdg_wt/peaks/peaks/Kg_tdg_ko_1_EKDL220000948-1a_H22J7DSX3_L2_peaks.broadPeak"), TDG_KO2_BP = rtracklayer::import("../Tdg_wt/peaks/peaks/Kg_tdg_ko_2_EKDL220003833-1a_HJ5T2DSX3_L1_peaks.broadPeak"),TDG_KO3_BP = rtracklayer::import("../Tdg_wt/peaks/peaks/Kg_tdg_ko_3_EKDL220003834-1a_HJ5T2DSX3_L1_peaks.broadPeak"))
peaks_Ctrl_BP <- c(Control_1 = rtracklayer::import("../Tdg_wt/peaks/peaks/Kg_tdg_M_1_EKDL220003835-1a_HJ5T2DSX3_L1_peaks.broadPeak"), Control_2 = rtracklayer::import("../Tdg_wt/peaks/peaks/Kg_tdg_M_3_EKDL220003836-1a_HJ5T2DSX3_L1_peaks.broadPeak"))

blacklist

blacklist <- import("../object_resources/mm10.blacklist_chrM.bed")
blacklist
## GRanges object with 165 ranges and 0 metadata columns:
##         seqnames              ranges strand
##            <Rle>           <IRanges>  <Rle>
##     [1]     chr1   24612621-24612850      *
##     [2]     chr1   48881431-48881690      *
##     [3]     chr1   58613871-58614090      *
##     [4]     chr1   78573921-78574140      *
##     [5]     chr1   88217961-88221950      *
##     ...      ...                 ...    ...
##   [161]     chr9   24541941-24542200      *
##   [162]     chr9   35305121-35305620      *
##   [163]     chr9 110281191-110281400      *
##   [164]     chr9 123872951-123873160      *
##   [165]     chrM             2-16299      *
##   -------
##   seqinfo: 20 sequences from an unspecified genome; no seqlengths
seqlevelsStyle(blacklist) <- "ensembl"

get consensus peaks

###total
x <- c(peaks_Ctrl_BP,peaks_TDG_BP)
total_consensus_peaks <- reduce(unlist(GRangesList(x)), with.revmap=TRUE)
y <- keepStandardChromosomes(total_consensus_peaks, pruning.mode = "coarse")
total_consensus_peaks <- granges(y[lengths(y$revmap)>1])
seqlevelsStyle(blacklist) <- "ensembl"
## Warning in .replace_seqlevels_style(x_seqlevels, value): found more than one
## best sequence renaming map compatible with seqname style "ensembl" for this
## object, using the first one
total_consensus_peaks <- total_consensus_peaks[!overlapsAny(total_consensus_peaks, blacklist)]# thats how you get rid of blacklisted peaks!!!!
total_consensus_peaks
## GRanges object with 34791 ranges and 0 metadata columns:
##           seqnames            ranges strand
##              <Rle>         <IRanges>  <Rle>
##       [1]        1   3193206-3194116      *
##       [2]        1   3371834-3373144      *
##       [3]        1   3670571-3672240      *
##       [4]        1   4256517-4258768      *
##       [5]        1   4326626-4326879      *
##       ...      ...               ...    ...
##   [34787]        Y 90806973-90807763      *
##   [34788]        Y 90810055-90813626      *
##   [34789]        Y 90819329-90819916      *
##   [34790]        Y 90825910-90828204      *
##   [34791]        Y 90835647-90836399      *
##   -------
##   seqinfo: 22 sequences from an unspecified genome; no seqlengths
total_consensus_peaks_2000 <- total_consensus_peaks[width(total_consensus_peaks)<=2000]

x <- c(peaks_TDG_BP)
TDG_consensus_peaks <- reduce(unlist(GRangesList(x)), with.revmap=TRUE)
y <- keepStandardChromosomes(TDG_consensus_peaks, pruning.mode = "coarse")
TDG_consensus_peaks <- granges(y[lengths(y$revmap)>1]) #for standard chromosomes use keep this line of code
seqlevelsStyle(blacklist) <- "ensembl"
## Warning in .replace_seqlevels_style(x_seqlevels, value): found more than one
## best sequence renaming map compatible with seqname style "ensembl" for this
## object, using the first one
TDG_consensus_peaks <- TDG_consensus_peaks[!overlapsAny(TDG_consensus_peaks, blacklist)]# thats how you get rid of blacklisted peaks!!!!
TDG_consensus_peaks
## GRanges object with 7735 ranges and 0 metadata columns:
##          seqnames            ranges strand
##             <Rle>         <IRanges>  <Rle>
##      [1]        1   3193206-3194116      *
##      [2]        1   4256517-4258752      *
##      [3]        1   4559822-4560973      *
##      [4]        1   4766599-4766942      *
##      [5]        1   4834934-4835688      *
##      ...      ...               ...    ...
##   [7731]        Y 90786836-90791666      *
##   [7732]        Y 90798938-90800732      *
##   [7733]        Y 90811286-90813626      *
##   [7734]        Y 90819329-90819916      *
##   [7735]        Y 90825910-90828112      *
##   -------
##   seqinfo: 22 sequences from an unspecified genome; no seqlengths

import 5fC/no5fC sites

sites_5fC <- readRDS("../object_resources/5fC/sites_5fC.rds")
sites_no5fC <- readRDS("../object_resources/5fC/sites_no5fC.rds")

import TSS with and without 5fC in proximity

TSS_2KB_5fC <- readRDS("../object_resources/5fC/TSS_2KB_5fC.rds")
TSS_2KB_no5fC <- readRDS("../object_resources/5fC/TSS_2KB_no5fC.rds")

calculation of normfactor generation of normalization factors

# background normalization
bg_nf_full <- bwNormFactors(total_bw_list, method = "background")
## Warning: bwNormFactors is deprecated, please gradually switch to
## `getNormFactors`.
## Comparing coverage in random regions...

1 A

p1 <- fragSizesDist(x = c(bamfiles_ctrl, bamfiles_TDG),what = "1")+ xlim(0,600)+  scale_x_continuous(n.breaks = 22)+scale_color_brewer(palette = "Dark2")+theme(text = element_text(size = 12))+ggtitle("Fragment Size Distribution (Chr1)") 
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
p1 <- fragSizesDist(x = c(bamfiles_ctrl, bamfiles_TDG),what = "1")+ xlim(0,600)+scale_color_brewer(palette = "Dark2")+theme(text = element_text(size = 12))+ggtitle("Fragment Size Distribution (Chr1)") 
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
p1+theme(legend.text = element_text(face="italic"))
## Warning: Removed 337053 rows containing non-finite values (`stat_density()`).

# saveRDS(p1+theme(legend.text = element_text(face="italic")),"../object_resources/figure_6_sup_meta/p1.rds")  
# p1 <- readRDS("../object_resources/figure_6_sup_meta/p1.rds")

2 B

broad total consensus peaks

m_bp_total <- signal2Matrix(total_bw_list,regions = total_consensus_peaks_2000,w = 10L)
## Reading ../Tdg_wt//tracks//Kg_tdg_M_1_EKDL220003835-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_M_3_EKDL220003836-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_1_EKDL220000948-1a_H22J7DSX3_L2.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_2_EKDL220003833-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_3_EKDL220003834-1a_HJ5T2DSX3_L1.bw
m_bp_total_bg <- rescaleSignalMatrices(m_bp_total,bg_nf_full)
# saveRDS(m_bp_total_bg,"../object_resources/figure_6_sup_meta/m_bp_total_bg.rds")

#m_bp_total_bg <- readRDS("../object_resources/figure_6_sup_meta/m_bp_total_bg.rds")
names(m_bp_total_bg) <- c("wt 1","wt 2","Tdg +|- 1","Tdg +|- 2","Tdg +|- 3")
p2 <- plotEnrichedHeatmaps(m_bp_total_bg,raster_resize_mat = TRUE , row_title="Accessible regions",scale_title = "backgr. \nnorm.\ndensity", colors = rocket(100),column_title_gp=gpar(fontface = "italic"),row_title_gp = gpar(fontsize = 11),top_annotation = FALSE, use_raster = TRUE)
p2

# saveRDS(p2,"../object_resources/figure_6_sup_meta/p2.rds")
# p2 <- readRDS("../object_resources/figure_6_sup_meta/p2.rds")

3 C

m_5fC <- signal2Matrix(c(total_bw_list),w = 10L, regions = sites_5fC)
## Reading ../Tdg_wt//tracks//Kg_tdg_M_1_EKDL220003835-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_M_3_EKDL220003836-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_1_EKDL220000948-1a_H22J7DSX3_L2.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_2_EKDL220003833-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_3_EKDL220003834-1a_HJ5T2DSX3_L1.bw
m_5fC_bg <- rescaleSignalMatrices(m_5fC,bg_nf_full)
#saveRDS(m_5fC_bg,"../object_resources/figure_6_sup_meta/m_5fC_bg.rds")
#m_5fC_bg <- readRDS("../object_resources/figure_6_sup_meta/m_5fC_bg.rds")

plotting heatmaps

names(m_5fC_bg) <- c("wt 1","wt 2","Tdg +|- 1","Tdg +|- 2","Tdg +|- 3")
p3 <- plotEnrichedHeatmaps(m_5fC_bg,raster_resize_mat = TRUE, row_title="5fC sites",scale_title = "backgr. \nnorm. \ndensity",colors = rocket(100),column_title_gp=gpar(fontface = "italic"),row_title_gp = gpar(fontsize = 11),top_annotation = FALSE, use_raster = TRUE)
p3

# saveRDS(p3,"../object_resources/figure_6_sup_meta/p3.rds")
# p3 <- readRDS("../object_resources/figure_6_sup_meta/p3.rds")

4 D

generation of signal matrices for 5fC TSS

m_TSS_5fC <- signal2Matrix(total_bw_list,w = 10L, regions = TSS_2KB_5fC)
## Reading ../Tdg_wt//tracks//Kg_tdg_M_1_EKDL220003835-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_M_3_EKDL220003836-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_1_EKDL220000948-1a_H22J7DSX3_L2.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_2_EKDL220003833-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_3_EKDL220003834-1a_HJ5T2DSX3_L1.bw
m_TSS_5fC_bg <- rescaleSignalMatrices(m_TSS_5fC,bg_nf_full)
# saveRDS(m_TSS_5fC,"../object_resources/figure_6_sup_meta/m_TSS_5fC.rds")
# saveRDS(m_TSS_5fC_bg,"../object_resources/figure_6_sup_meta/m_TSS_5fC_bg.rds")
# m_TSS_5fC_bg <- readRDS("../object_resources/figure_6_sup_meta/m_TSS_5fC_bg.rds")

names(m_TSS_5fC_bg) <- c("wt 1","wt 2","Tdg +|- 1","Tdg +|- 2","Tdg +|- 3")

plotting heatmaps

p4 <- plotEnrichedHeatmaps(m_TSS_5fC_bg,raster_resize_mat = TRUE, row_title="TSS with 5fC",scale_title = "backgr. \nnorm.\ndensity", axis_name = c("-2kb","TSS","+2kb"),colors = rocket(100),column_title_gp=gpar(fontface = "italic"),row_title_gp = gpar(fontsize = 11),top_annotation = FALSE,use_raster = TRUE)
print(p4)

# saveRDS(p4,"../object_resources/figure_6_sup_meta/p4.rds")
# p4 <- readRDS("../object_resources/figure_6_sup_meta/p4.rds")

5 joint

pp <- plot_grid(p1,grid::grid.grabExpr(draw(p2)),grid::grid.grabExpr(draw(p3)),grid::grid.grabExpr(draw(p4)),nrow = 4,labels =c("A","B","C","D"),ncol = 1)
## Warning: Removed 337053 rows containing non-finite values (`stat_density()`).
pp

pdf("figure_6_sup_meta.pdf", width=9, height=8)
pp
dev.off()
## png 
##   2
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] cowplot_1.1.1          viridis_0.6.2          viridisLite_0.4.2     
##  [4] RColorBrewer_1.1-3     AnnotationHub_2.22.1   BiocFileCache_1.14.0  
##  [7] dbplyr_2.1.1           TFBSTools_1.28.0       universalmotif_1.8.5  
## [10] MotifDb_1.32.0         Biostrings_2.58.0      XVector_0.30.0        
## [13] memes_0.99.11          motifmatchr_1.12.0     chromVAR_1.12.0       
## [16] ggplot2_3.4.2          rtracklayer_1.50.0     epiwraps_0.99.72      
## [19] EnrichedHeatmap_1.29.2 ComplexHeatmap_2.15.1  GenomicRanges_1.42.0  
## [22] GenomeInfoDb_1.26.7    IRanges_2.24.1         S4Vectors_0.28.1      
## [25] BiocGenerics_0.36.1   
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.4                    R.utils_2.11.0               
##   [3] tidyselect_1.2.0              poweRlaw_0.70.6              
##   [5] RSQLite_2.2.11                AnnotationDbi_1.52.0         
##   [7] htmlwidgets_1.5.4             BiocParallel_1.24.1          
##   [9] munsell_0.5.0                 codetools_0.2-19             
##  [11] DT_0.22                       miniUI_0.1.1.1               
##  [13] withr_2.5.2                   colorspace_2.1-0             
##  [15] Biobase_2.50.0                highr_0.10                   
##  [17] knitr_1.45                    rstudioapi_0.15.0            
##  [19] labeling_0.4.3                Rdpack_2.6                   
##  [21] MatrixGenerics_1.2.1          GenomeInfoDbData_1.2.4       
##  [23] farver_2.1.1                  bit64_4.0.5                  
##  [25] vctrs_0.6.1                   generics_0.1.3               
##  [27] xfun_0.41                     biovizBase_1.38.0            
##  [29] ggseqlogo_0.1                 R6_2.5.1                     
##  [31] doParallel_1.0.17             splitstackshape_1.4.8        
##  [33] clue_0.3-65                   locfit_1.5-9.4               
##  [35] AnnotationFilter_1.14.0       bitops_1.0-7                 
##  [37] cachem_1.0.7                  DelayedArray_0.16.3          
##  [39] assertthat_0.2.1              promises_1.2.0.1             
##  [41] scales_1.2.1                  nnet_7.3-19                  
##  [43] gtable_0.3.3                  Cairo_1.6-0                  
##  [45] ensembldb_2.14.1              seqLogo_1.56.0               
##  [47] rlang_1.1.2                   GlobalOptions_0.1.2          
##  [49] splines_4.0.3                 lazyeval_0.2.2               
##  [51] dichromat_2.0-0.1             checkmate_2.3.0              
##  [53] BiocManager_1.30.20           yaml_2.3.7                   
##  [55] reshape2_1.4.4                GenomicFeatures_1.42.3       
##  [57] backports_1.4.1               httpuv_1.6.9                 
##  [59] Hmisc_4.6-0                   tools_4.0.3                  
##  [61] ellipsis_0.3.2                jquerylib_0.1.4              
##  [63] Rcpp_1.0.11                   plyr_1.8.9                   
##  [65] base64enc_0.1-3               progress_1.2.2               
##  [67] zlibbioc_1.36.0               purrr_1.0.1                  
##  [69] RCurl_1.98-1.13               prettyunits_1.2.0            
##  [71] rpart_4.1.21                  openssl_2.0.0                
##  [73] pbapply_1.7-2                 GetoptLong_1.0.5             
##  [75] GenomicFiles_1.26.0           SummarizedExperiment_1.20.0  
##  [77] cluster_2.1.4                 magrittr_2.0.3               
##  [79] magick_2.8.1                  data.table_1.14.8            
##  [81] circlize_0.4.15               ProtGenerics_1.22.0          
##  [83] matrixStats_1.1.0             hms_1.1.1                    
##  [85] mime_0.12                     evaluate_0.23                
##  [87] xtable_1.8-4                  XML_3.99-0.15                
##  [89] jpeg_0.1-10                   gridExtra_2.3                
##  [91] shape_1.4.6                   compiler_4.0.3               
##  [93] biomaRt_2.46.3                tibble_3.2.1                 
##  [95] crayon_1.5.2                  R.oo_1.25.0                  
##  [97] htmltools_0.5.7               later_1.3.1                  
##  [99] tzdb_0.3.0                    Formula_1.2-5                
## [101] tidyr_1.2.0                   DBI_1.1.3                    
## [103] MASS_7.3-60                   rappdirs_0.3.3               
## [105] Matrix_1.6-3                  readr_2.1.2                  
## [107] cli_3.6.1                     rbibutils_2.2.16             
## [109] R.methodsS3_1.8.2             Gviz_1.34.1                  
## [111] pkgconfig_2.0.3               GenomicAlignments_1.26.0     
## [113] TFMPvalue_0.0.8               foreign_0.8-84               
## [115] plotly_4.10.0                 xml2_1.3.5                   
## [117] foreach_1.5.2                 annotate_1.68.0              
## [119] bslib_0.3.1                   DirichletMultinomial_1.32.0  
## [121] stringr_1.5.0                 VariantAnnotation_1.36.0     
## [123] digest_0.6.33                 pracma_2.4.4                 
## [125] CNEr_1.26.0                   rmarkdown_2.13               
## [127] htmlTable_2.4.0               edgeR_3.32.1                 
## [129] curl_5.0.0                    shiny_1.7.1                  
## [131] Rsamtools_2.6.0               gtools_3.9.5                 
## [133] rjson_0.2.21                  lifecycle_1.0.4              
## [135] jsonlite_1.8.7                askpass_1.2.0                
## [137] limma_3.46.0                  BSgenome_1.58.0              
## [139] fansi_1.0.5                   pillar_1.9.0                 
## [141] lattice_0.21-8                KEGGREST_1.30.1              
## [143] fastmap_1.1.1                 httr_1.4.2                   
## [145] survival_3.3-1                GO.db_3.12.1                 
## [147] interactiveDisplayBase_1.28.0 glue_1.6.2                   
## [149] UpSetR_1.4.0                  png_0.1-8                    
## [151] iterators_1.0.14              BiocVersion_3.12.0           
## [153] bit_4.0.5                     stringi_1.8.1                
## [155] sass_0.4.1                    blob_1.2.2                   
## [157] latticeExtra_0.6-29           caTools_1.18.2               
## [159] memoise_2.0.1                 dplyr_1.1.1